##  merge.all.results.pl
##
##  This Perl program merges the numerical output of svm_classify with the identifiers
##  in the input data, sorting them into positive and negative cases using a threshold of
##  0.0. Arbitrary mess at the moment: everything is hard-coded
##
##  TO RUN PROGRAM:
##
##  perl merge.results.pl prefix
##
##  where prefix applies to the .predict and .new.output.txt files.
##
##  INPUT FILES:
##
##    FPRED : "$prefix.predict" -- svm_classify output file
##
##    FIN: "$prefix.new.output.txt", which is the input (along with a model) to svm_classify
##
##  OUTPUT FILES:
##
##    FPOS : ">$prefix.positive.txt" -- positive cases
##    FPOS : ">$prefix.negative.txt" -- negative cases
##
##  PROGRAMMING NOTES:
##  None
##
##  SYSTEM REQUIREMENTS
##  This program has been successfully run under Mac OS 10.5; it is standard perl
##  so it should also run in Unix or Windows.
##
##  PROVENANCE:
##  Programmer: Philip A. Schrodt
##              Dept of Political Science
##              Pennsylvania State University
##              227 Pond Laboratory
##              University Park, PA, 16802 U.S.A.
##              http://web.ku.edu/keds
##
##  Copyright (c) 2009  Philip A. Schrodt.  All rights reserved.
##
##   Redistribution and use in source and binary forms, with or without modification,
##   are permitted under the terms of the GNU General Public License:
##   http://www.opensource.org/licenses/gpl-license.html
##
##  Programming supported by National Science Foundation Political Science Program Grant
##  SES-0719634 "Improving the Efficiency of Militarized Interstate Dispute Data Collection using
##  Automated Textual Analysis" and SES-0924240, "MID4: Updating the Militarized Dispute Data Set
##  2002-2010."
##
##  Report bugs to: schrodt@ku.edu
##
##  For plausible indenting of this source code, set the tab size in your editor to "2"
##
##  REVISION HISTORY:
##  25-July-08:  Initial version
##  29-Nov-08 :  Generalized with command line prefix
##
##  ----------------------------------------------------------------------------------

# ======== globals =========== #

$prefix = "LN93all";

# ======== main program =========== #

if (length($ARGV[0]) < 4) {
  print "prefix option is required to run the program\n";
  exit;
}
else { $prefix = $ARGV[0]; }

open(FPRED,"$prefix.predict")  or die "Can\'t open input file; error $!";
open(FIN,"$prefix.new.output.txt")  or die "Can\'t open input file; error $!";

#$output_name = ">merge.train.2.".$ARGV[0];
#open(FPRED,"pred1")  or die "Can\'t open input file; error $!";
#open(FIN,"cigar.nlp.output.txt")  or die "Can\'t open input file; error $!";

open(FPOS,">$prefix.positive.txt") or die "Can\'t open output file ; error $!";
open(FNEG,">$prefix.negative.txt") or die "Can\'t open output file ; error $!";

# vjd 8/23: file which contains the features vectors for all positives
open(POS_FEATS,">$prefix.pos.features") or die "Can\'t open output file ; error $!";

$ncase = 0;
$npos  = 0;
$nneg  = 0;

while ($line = <FIN>) {
  chomp($pred = <FPRED>);
  $id = substr($line,index($line,"#"));
  ++$ncase;
  if ($pred > 0.0) {
    print FPOS $pred,"\t",$id;
    print POS_FEATS $line;
    ++$npos;
  }
  else  {
    print FNEG $pred,"\t",$id;
    ++$nneg;
  }
}

print FPOS "Positive cases: ",$npos,"  Total cases: ", $ncase,"  Proportion: ",($npos/$ncase),"\n";
print FNEG "Negative cases: ",$nneg,"  Total cases: ", $ncase,"  Proportion: ",($nneg/$ncase),"\n";
print POS_FEATS "Positive cases: ",$npos,"  Total cases: ", $ncase,"  Proportion: ",($npos/$ncase),"\n";

close(FPOS) or die "Can\'t close output file ; error $!";
close(FNEG) or die "Can\'t close output file ; error $!";
close(POS_FEATS) or die "Can\'t close output file ; error $!";
close(FIN) or die "Can\'t close input file ; error $!";
close(FPRED) or die "Can\'t close input file ; error $!";
print "Program has finished!\n";
